

***** combining
clear
local csvfiles: dir "coded batches" files "*.xlsx"

foreach file of local csvfiles {
import excel "coded batches/`file'", sheet("articles") firstrow clear
local name: subinstr local file ".xlsx" ""
save "`name'.dta", replace
}

use "batch_1.dta", clear
append using "batch_4.dta"
append using "batch_5.dta"
append using "batch_6.dta"
append using "batch_7.dta"
append using "batch_8.dta"
append using "batch_9.dta"
append using "batch_10.dta"
append using "batch_11.dta"
append using "batch_12.dta"
append using "batch_13.dta"
append using "batch_14.dta"
append using "batch_15.dta"
append using "batch_16.dta"
append using "batch_17.dta"
append using "batch_18.dta"
append using "batch_19.dta"
append using "batch_20.dta"
append using "batch_21.dta"
append using "batch_22.dta"
save "all criteria.dta", replace

erase "batch_1.dta"
erase "batch_4.dta"
erase "batch_5.dta"
erase "batch_6.dta"
erase "batch_7.dta"
erase "batch_8.dta"
erase "batch_9.dta"
erase "batch_10.dta"
erase "batch_11.dta"
erase "batch_12.dta"
erase "batch_13.dta"
erase "batch_14.dta"
erase "batch_15.dta"
erase "batch_16.dta"
erase "batch_17.dta"
erase "batch_18.dta"
erase "batch_19.dta"
erase "batch_20.dta"
erase "batch_21.dta"
erase "batch_22.dta"


***** cleaning
duplicates drop

rename relevans type
rename ämne topic
rename analysnivå level
rename kontext context
rename stil style

foreach var of varlist type topic level context format style geo {
	replace `var' = substr(`var',1,1)
	destring `var', replace
}

replace type = 2 if type == 3
replace level = 4 if level == 5 | level == 6
replace format = 4 if format == 6 | format == 7

foreach var of varlist type topic level context format style geo {
	replace `var' = `var' - 1
}

replace format = 5 if format == 7

gen headl_text = headline + " " + text
drop if missing(headl_text)

save "all_ids.dta", replace

keep headl_text type
rename type label
drop if missing(label)
export delimited using "type_train.csv", replace

use "all_ids.dta", clear
keep headl_text topic
rename topic label
drop if missing(label)
export delimited using "topic_train.csv", replace

use "all_ids.dta", clear
keep headl_text level
rename level label
drop if missing(label)
export delimited using "level_train.csv", replace

use "all_ids.dta", clear
keep headl_text context
rename context label
drop if missing(label)
export delimited using "context_train.csv", replace

use "all_ids.dta", clear
keep headl_text format
rename format label
drop if missing(label)
export delimited using "format_train.csv", replace

use "all_ids.dta", clear
keep headl_text style
rename style label
drop if missing(label)
export delimited using "style_train.csv", replace

use "all_ids.dta", clear
keep headl_text geo
rename geo label
drop if missing(label)
export delimited using "geo_train.csv", replace


erase "all criteria.dta"



















